# Percentages for each category / factor level # following to obtain my sample data: mydataframe <- read.delim("http://www.robin-beaumont.co.uk/virtualclassroom/stats/basics/coursework/data/pain_medication.dat", header=TRUE) # local version ignore: # mydataframe <- read.delim("D:\\web_sites_mine\\HIcourseweb # new\\book2data\\pain_medication.dat", header=TRUE) # names(mydataframe) str(mydataframe) mydataframe freq_dosagetable <- table(mydataframe$dosage) freq_dosagetable # cumsum provides the cumulative total number cum_freq <- cumsum(freq_dosagetable) cum_freq # also need the toal number of observations using length() totalcount <- length(mydataframe$dosage) totalcount # now the relative frequency is the count # i.e freq_dosagetable dividied by total rel_freq <- freq_dosagetable / totalcount rel_freq # and the cumulative relative frequency # is the cumulative count divided by the total cum_rel_freq <- cum_freq / totalcount cum_rel_freq ######### to get the results into a dataframe use the pylr library # can also do multiple categories install.packages("plyr", dependencies=TRUE) library(plyr) count(mydataframe, c('dosage','health')) # produces # dosage health freq #1 High Fair 34 #2 High Good 42 #3 High Poor 12 #4 Low Fair 47 #5 Low Good 49 #6 Low Poor 16 ######### count(mydataframe, c('dosage','health','gender')) # produces # dosage health gender freq #1 High Fair Female 14 #2 High Fair Male 20 #3 High Good Female 20 #4 High Good Male 22 #5 High Poor Female 8 #6 High Poor Male 4 #7 Low Fair Female 23 #8 Low Fair Male 24 #9 Low Good Female 25 #10 Low Good Male 24 #11 Low Poor Female 11 #12 Low Poor Male 5